function [pmle se condition] = PMLE(Y,X,pmle0)
%% Calculates PMLE point estimator and standard error.
% inputs: Y = n-by-n array of outcomes; X = d-dimensional cell of n-by-n matrices of regressors; initial value (gmm0).
% outputs: point estimate (pmle) and standard error (se).

[pmle, condition, numiter, se] = Newton(@Likelihood,pmle0,Y,X);

function [likelihood score Hessian Se] = Likelihood(psi,Y,X)
%% setup
% dimensions
[n m] = size(Y  ); % sample size
[K D] = size(psi); % number of regressors, number of GMM estimators (different sets of moments) 
nn = nchoosek(n,2); mm = nchoosek(m,2); rho = nn*mm;
% variable definitions
I = zeros(n,m); for k = 1:K, I    = I + X{k}*psi(k); end
T = zeros(n,m); for k = 1:K, T    =          exp(I); end
V =  cell(K,1); for k = 1:K, V{k} =         T.*X{k}; end
% variable definitions for normalization
YY = zeros(n-1,m);              YY    =     Y(2:end,:)   ;
XX =  cell(K  ,1); for k = 1:K, XX{k} =  X{k}(2:end,:)   ; end
II = zeros(n-1,m); for k = 1:K, II    = II + XX{k}*psi(k); end
TT = zeros(n-1,m);              TT    =           exp(II);
VV =  cell(K  ,1); for k = 1:K, VV{k} =         TT.*XX{k}; end
%% estimate fixed effects for given psi
% estimate gamma given psi
gamma = NewtonRaphsonMax2(@FixedEffectLikelihoodGamma,ones(1,m),Y,X,psi);
if sum(isnan(gamma))>0, 
    %likelihood{1} = NaN; score{1}=zeros(2,1); Hessian{1} = zeros(2,2); 
    %mVar=zeros(2,2); Se=zeros(2,1); asyvar = zeros(2,2);
    likelihood = NaN; score=zeros(2,1); Hessian = zeros(2,2); 
    mVar=zeros(2,2); Se=zeros(2,1); asyvar = zeros(2,2);
    return; 
end;
% estimate alpha given gamma, psi
alpha = log(sum(YY,2))-log(sum(exp(II+ones(n-1,1)*gamma),2)); alpha = [0; alpha]; % alpha_1 = 1 (normalized)
%% profiled likelihood
L = Y.*(I+alpha*ones(1,m)+ones(n,1)*gamma)-exp(I+alpha*ones(1,m)+ones(n,1)*gamma); L = sum(sum(L));
%% derivatives of alpha,gamma wrt psi for profiling in score vector
% sensitivity of alpha
dalpha_dgamma = -           exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma)   ./(sum(YY,2)*ones(1,m)); % (n-1) x m
dalpha_dpsi1  = - sum(XX{1}.*exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma),2)./ sum(YY,2)           ; % (n-1) x 1
dalpha_dpsi2  = - sum(XX{2}.*exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma),2)./ sum(YY,2)           ; % (n-1) x 1

H = zeros(m,m);
for j1 = 1:m,
    for j2 = 1:m,
        cross = exp(II(:,j1)+alpha(2:end)+gamma(j1)).*exp(II(:,j2)+alpha(2:end)+gamma(j2))./sum(YY,2);
        own   = exp( I(:,j1)+alpha(1:end)+gamma(j1))                                                 ;
        H(j1,j2) = -(-sum(cross)+sum(own).*(j1==j2))                                                 ;
    end
end

J1 = - sum(X{1}.*exp(I+alpha*ones(1,m)+ones(n,1)*gamma)) - (dalpha_dpsi1'*exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma)); % 1 x m
J2 = - sum(X{2}.*exp(I+alpha*ones(1,m)+ones(n,1)*gamma)) - (dalpha_dpsi2'*exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma)); % 1 x m
dgamma_dpsi1 = - inv(H)*J1'; % m x 1
dgamma_dpsi2 = - inv(H)*J2'; % m x 1
%% profile score vector
S1 = sum(sum((Y-exp(I+alpha*ones(1,m)+ones(n,1)*gamma)).*X{1}))... % direct effect
  + sum(dalpha_dpsi1.*sum(YY-exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma),2))... % effect through alpha
  + sum((dalpha_dgamma*dgamma_dpsi1).*sum(YY-exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma),2))... % effect through alpha-gamma
  + sum(sum((Y-exp(I+alpha*ones(1,m)+ones(n,1)*gamma))).*dgamma_dpsi1');

S2 = sum(sum((Y-exp(I+alpha*ones(1,m)+ones(n,1)*gamma)).*X{2}))... % direct effect
  + sum(dalpha_dpsi2.*sum(YY-exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma),2))... % effect through alpha
  + sum((dalpha_dgamma*dgamma_dpsi2).*sum(YY-exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma),2))... % effect through alpha-gamma
  + sum(sum((Y-exp(I+alpha*ones(1,m)+ones(n,1)*gamma))).*dgamma_dpsi2');

S = [S1; S2];

%% derivatives of alpha,gamma wrt psi for profiling in Hessian matrix
MU   = exp(I +alpha*ones(1,m)       +ones(n,1)  *gamma);
MUMU = exp(II+alpha(2:end)*ones(1,m)+ones(n-1,1)*gamma);

dalpha_dpsi1dpsi1 = sum(MUMU.*(XX{1}+(dalpha_dpsi1*ones(1,m))),2)./sum(YY,2);
dalpha_dpsi2dpsi2 = sum(MUMU.*(XX{2}+(dalpha_dpsi2*ones(1,m))),2)./sum(YY,2);
dalpha_dpsi1dpsi2 = sum(MUMU.*(XX{1}+(dalpha_dpsi2*ones(1,m))),2)./sum(YY,2);
dalpha_dpsi2dpsi1 = sum(MUMU.*(XX{2}+(dalpha_dpsi1*ones(1,m))),2)./sum(YY,2);


dLdgammadpsi1dpsi1 = -sum(MU.*X{1}.*X{1})-sum(MUMU.*XX{1}.*(dalpha_dpsi1*ones(1,m)))-sum(MUMU.*(XX{1}+dalpha_dpsi1dpsi1*ones(1,m)).*(dalpha_dpsi1*ones(1,m)));
dLdgammadpsi1dpsi2 = -sum(MU.*X{1}.*X{2})-sum(MUMU.*XX{1}.*(dalpha_dpsi2*ones(1,m)))-sum(MUMU.*(XX{2}+dalpha_dpsi1dpsi2*ones(1,m)).*(dalpha_dpsi1*ones(1,m)));
dLdgammadpsi2dpsi1 = -sum(MU.*X{2}.*X{1})-sum(MUMU.*XX{2}.*(dalpha_dpsi1*ones(1,m)))-sum(MUMU.*(XX{1}+dalpha_dpsi2dpsi1*ones(1,m)).*(dalpha_dpsi2*ones(1,m)));
dLdgammadpsi2dpsi2 = -sum(MU.*X{2}.*X{2})-sum(MUMU.*XX{2}.*(dalpha_dpsi2*ones(1,m)))-sum(MUMU.*(XX{2}+dalpha_dpsi2dpsi2*ones(1,m)).*(dalpha_dpsi2*ones(1,m)));


dLdgammadgammadpsi1 = zeros(m,m);
for j1 = 1:m,
    for j2 = 1:m,
        cross = MUMU(:,j1).*(XX{1}(:,j1)+dalpha_dpsi1).*dalpha_dgamma(:,j2);
        own1  =   MU(:,j1).*X{1}(:,j1)                                     ;
        own2  = MUMU(:,j1).*dalpha_dgamma(:,j1)                            ;
        
        dLdgammadgammadpsi1(j1,j2) = -(-sum(cross)+(sum(own1)+sum(own2)).*(j1==j2));
    end
end

dLdgammadgammadpsi2 = zeros(m,m);
for j1 = 1:m,
    for j2 = 1:m,
        cross = MUMU(:,j1).*(XX{2}(:,j1)+dalpha_dpsi2).*dalpha_dgamma(:,j2);
        own1  =   MU(:,j1).*X{2}(:,j1)                                     ;
        own2  = MUMU(:,j1).*dalpha_dgamma(:,j1)                            ;
        
        dLdgammadgammadpsi2(j1,j2) = -(-sum(cross)+(sum(own1)+sum(own2)).*(j1==j2));
    end
end

dLdgammadgammadgamma = zeros(m,m,m);
for j1 = 1:m,
    for j2 = 1:m,
        for j3 = 1:m,
            first  =    sum((MUMU(:,j1).*MUMU(:,j2))*((j1==j3)+(j2==j3))./sum(YY,2));
            second = 2* sum( MUMU(:,j1).*MUMU(:,j2).*dalpha_dgamma(:,j3)./sum(YY,2));
            third  =   -sum( MUMU(:,j1).*(dalpha_dgamma(:,j3)+(j1==j3))*(j1==j2))   ;
            fourth  =  -sum(   MU(:,j1).*((j1==j3))*(j1==j2))                       ;
            
            dLdgammadgammadgamma(j1,j2,j3) = first+second+third+fourth;
        end
    end
end


for j=1:m, operate11(j) = dgamma_dpsi1'*dLdgammadgammadgamma(:,:,j)*dgamma_dpsi1; end; operate11 = operate11';
for j=1:m, operate12(j) = dgamma_dpsi1'*dLdgammadgammadgamma(:,:,j)*dgamma_dpsi2; end; operate12 = operate12';
for j=1:m, operate21(j) = dgamma_dpsi2'*dLdgammadgammadgamma(:,:,j)*dgamma_dpsi1; end; operate21 = operate21';
for j=1:m, operate22(j) = dgamma_dpsi2'*dLdgammadgammadgamma(:,:,j)*dgamma_dpsi2; end; operate22 = operate22';

dgamma_dpsi1dpsi1 = - inv(H)*(dLdgammadpsi1dpsi1'+dLdgammadgammadpsi1*dgamma_dpsi1+dLdgammadgammadpsi1*dgamma_dpsi1+ operate11);
dgamma_dpsi1dpsi2 = - inv(H)*(dLdgammadpsi1dpsi2'+dLdgammadgammadpsi2*dgamma_dpsi1+dLdgammadgammadpsi1*dgamma_dpsi2+ operate21);
dgamma_dpsi2dpsi1 = - inv(H)*(dLdgammadpsi2dpsi1'+dLdgammadgammadpsi1*dgamma_dpsi2+dLdgammadgammadpsi2*dgamma_dpsi1+ operate12);
dgamma_dpsi2dpsi2 = - inv(H)*(dLdgammadpsi2dpsi2'+dLdgammadgammadpsi2*dgamma_dpsi2+dLdgammadgammadpsi2*dgamma_dpsi2+ operate22);

%dgamma_dpsi1dpsi1 = - inv(H)*(dLdgammadgammadpsi1*dgamma_dpsi1+dLdgammadgammadpsi1*dgamma_dpsi1+ operate11);
%dgamma_dpsi1dpsi2 = - inv(H)*(dLdgammadgammadpsi2*dgamma_dpsi1+dLdgammadgammadpsi1*dgamma_dpsi2+ operate21);
%dgamma_dpsi2dpsi1 = - inv(H)*(dLdgammadgammadpsi1*dgamma_dpsi2+dLdgammadgammadpsi2*dgamma_dpsi1+ operate12);
%dgamma_dpsi2dpsi2 = - inv(H)*(dLdgammadgammadpsi2*dgamma_dpsi2+dLdgammadgammadpsi2*dgamma_dpsi2+ operate22);



DalphaDpsi1 = dalpha_dgamma*dgamma_dpsi1 + dalpha_dpsi1;
DalphaDpsi2 = dalpha_dgamma*dgamma_dpsi2 + dalpha_dpsi2;

DalphaDpsi1Dpsi1 = sum(MUMU.*(XX{1}+DalphaDpsi1*ones(1,m)+ones(n-1,1)*dgamma_dpsi1'+ones(n-1,1)*dgamma_dpsi1dpsi1'),2)./sum(YY,2);
DalphaDpsi1Dpsi2 = sum(MUMU.*(XX{2}+DalphaDpsi2*ones(1,m)+ones(n-1,1)*dgamma_dpsi2'+ones(n-1,1)*dgamma_dpsi1dpsi2'),2)./sum(YY,2);
DalphaDpsi2Dpsi1 = sum(MUMU.*(XX{1}+DalphaDpsi1*ones(1,m)+ones(n-1,1)*dgamma_dpsi1'+ones(n-1,1)*dgamma_dpsi2dpsi1'),2)./sum(YY,2);
DalphaDpsi2Dpsi2 = sum(MUMU.*(XX{2}+DalphaDpsi2*ones(1,m)+ones(n-1,1)*dgamma_dpsi2'+ones(n-1,1)*dgamma_dpsi2dpsi2'),2)./sum(YY,2);


HH11 = -sum(sum(MU.*(X{1}+ones(n,1)*dgamma_dpsi1').^2))+sum(sum((Y-MU).*(ones(n,1)*dgamma_dpsi1dpsi1')))...
       -sum(sum(MUMU.*(DalphaDpsi1*ones(1,m)).*(XX{1}+ones(n-1,1)*dgamma_dpsi1')))...
       -sum(sum(MUMU.*(XX{1}+DalphaDpsi1*ones(1,m)+ones(n-1,1)*dgamma_dpsi1').*(DalphaDpsi1*ones(1,m))))...
       +sum(sum((YY-MUMU).*(DalphaDpsi1Dpsi1*ones(1,m)) ));
   
   
HH12 = -sum(sum(MU.*(X{1}+ones(n,1)*dgamma_dpsi1').*(X{2}+ones(n,1)*dgamma_dpsi2')))+sum(sum((Y-MU).*(ones(n,1)*dgamma_dpsi1dpsi2')))...
       -sum(sum(MUMU.*(DalphaDpsi1*ones(1,m)).*(XX{2}+ones(n-1,1)*dgamma_dpsi2')))...
       -sum(sum(MUMU.*(XX{2}+DalphaDpsi2*ones(1,m)+ones(n-1,1)*dgamma_dpsi2').*(DalphaDpsi1*ones(1,m))))...
       +sum(sum((YY-MUMU).*(DalphaDpsi1Dpsi2*ones(1,m)) ));  
   
HH21 = -sum(sum(MU.*(X{2}+ones(n,1)*dgamma_dpsi2').*(X{1}+ones(n,1)*dgamma_dpsi1')))+sum(sum((Y-MU).*(ones(n,1)*dgamma_dpsi2dpsi1')))...
       -sum(sum(MUMU.*(DalphaDpsi2*ones(1,m)).*(XX{1}+ones(n-1,1)*dgamma_dpsi1')))...
       -sum(sum(MUMU.*(XX{1}+DalphaDpsi1*ones(1,m)+ones(n-1,1)*dgamma_dpsi1').*(DalphaDpsi2*ones(1,m))))...
       +sum(sum((YY-MUMU).*(DalphaDpsi2Dpsi1*ones(1,m)) ));  

  
HH22 = -sum(sum(MU.*(X{2}+ones(n,1)*dgamma_dpsi2').^2))+sum(sum((Y-MU).*(ones(n,1)*dgamma_dpsi2dpsi2')))...
       -sum(sum(MUMU.*(DalphaDpsi2*ones(1,m)).*(XX{2}+ones(n-1,1)*dgamma_dpsi2')))...
       -sum(sum(MUMU.*(XX{2}+DalphaDpsi2*ones(1,m)+ones(n-1,1)*dgamma_dpsi2').*(DalphaDpsi2*ones(1,m))))...
       +sum(sum((YY-MUMU).*(DalphaDpsi2Dpsi2*ones(1,m)) ));

   
HH = [HH11, HH12; HH21, HH22];


var1 = zeros(n,m); 
v1 = (Y-MU).*(X{1}+ones(n,1)*dgamma_dpsi1'); v2 = (YY-MUMU).*(DalphaDpsi1*ones(1,m)); v3 = zeros(n,m); v3(2:end,:)=v2; var1 = v1+v3;

var2 = zeros(n,m); 
v1 = (Y-MU).*(X{2}+ones(n,1)*dgamma_dpsi2'); v2 = (YY-MUMU).*(DalphaDpsi2*ones(1,m)); v3 = zeros(n,m); v3(2:end,:)=v2; var2 = v1+v3;


V = zeros(2,2);
V(1,1) = mean(mean(var1.*var1,2));
V(1,2) = mean(mean(var1.*var2,2));
V(2,1) = mean(mean(var2.*var1,2));
V(2,2) = mean(mean(var2.*var2,2));



%likelihood = cell(1); likelihood{1} = zeros(1,1); likelihood{1} = L;
%score      = cell(1); score{1}      = zeros(K,1); score{1}      = S;
%Hessian    = cell(1); Hessian{1}    = zeros(K,K); Hessian{1}    =HH;

likelihood = L;
score      = S;
Hessian    =HH;

mVar = V; 

asyvar = inv(-HH/(n*m))*V*inv(-HH/(n*m)); Se = sqrt(diag(asyvar)/(n*m));
asyvar = asyvar/(n*m);




function [L S H] = FixedEffectLikelihoodGamma(gamma,Y,X,psi)
[n m] = size(Y); 
[K D] = size(psi);
I = X{1}*psi(1)+X{2}*psi(2);
% estimates of alpha given psi,gamma
YY = zeros(n-1,m);              YY    =     Y(2:end,:)   ;
XX =  cell(K  ,1); for k = 1:K, XX{k} =  X{k}(2:end,:)   ; end; II = XX{1}*psi(1)+XX{2}*psi(2);
alpha = log(sum(YY,2))-log(sum(exp(II+ones(n-1,1)*gamma),2)); alpha = [0; alpha]; % alpha_1 = 1 (normalized)
% likelihood, score vector and Hessian matrix for gamma given psi
L = Y.*(I+alpha*ones(1,m)+ones(n,1)*gamma)-exp(I+alpha*ones(1,m)+ones(n,1)*gamma); L = sum(sum(L));
S = Y-exp(I+alpha*ones(1,m)+ones(n,1)*gamma); S = sum(S)';

H = zeros(m,m);
for j1 = 1:m,
    for j2 = 1:m,
        cross = exp(II(:,j1)+alpha(2:end)+gamma(j1)).*exp(II(:,j2)+alpha(2:end)+gamma(j2))./sum(YY,2);
        own   = exp( I(:,j1)+alpha(1:end)+gamma(j1))                                                     ;

        H(j1,j2) = -(-sum(cross)+sum(own).*(j1==j2));
    end
end


%% Newton algorithm used for optimization
function [x condition it J]=Newton(FUN,x,varargin) % varargout
% maximises FUN, starting at x by Newton-Raphson method
tol=1e-5; maxit=100; smalleststep=.5^20;
it=1; condition=1; improvement=1; k=length(x);
[f g H J] =feval(FUN,x,varargin{:}); %varargout
while it<=maxit && condition==1 && improvement==1;
    [s1 s2]=size(H); if s1==s2 && s2>1 d=-pinv(H)*g; else d=-g./H; end      
    step=1; improvement=0;
    while step>=smalleststep && improvement==0;
        [ff gg HH JJ] =feval(FUN,x+step*d,varargin{:}); %varargout
        if (ff-f)/abs(f)>=-1e-5
            improvement=1; condition=sqrt(step*step*(d'*d))>tol & (ff-f)>tol;
            x=x+step*d; f=ff; g=gg; H=HH; J = JJ;
        else
            step=step/2;
        end
    end
    it=it+1;
end
it=it-1;

function [x f condition it]=NewtonRaphsonMax2(FUN,x,varargin) % varargout
% maximises FUN, starting at x by Newton-Raphson method
tol=1e-6; maxit=100; smalleststep=.5^20;
it=1; condition=1; improvement=1; k=length(x);
[f g H]=feval(FUN,x,varargin{:}); %varargout
while it<=maxit && condition==1 && improvement==1;
    d=-inv(H)*g;  step=1; improvement=0; update = x+step.*d';
    if isnan(update)==1, x = NaN*ones(k,1); continue; end
    while step>=smalleststep && improvement==0;
        [ff gg HH]=feval(FUN,x+step*d',varargin{:}); %varargout
        if (ff-f)/abs(f)>=-tol
            improvement=1; condition=sqrt(step*step*(d'*d))>tol & (ff-f)>tol;
            x=x+step*d'; f=ff; g=gg; H=HH;
        else
            step=step/2;
        end
    end
    it=it+1;
end
it=it-1;
